# encoding: utf-8
import os
import codecs

# 格式：
#   ID|原名|完整原名（first, middle, last）|译名|完整译名|yyyymmdd-[yyyymmdd]|国家|时期|头衔

def get_id_from_full_name(full_name):
    return full_name.lower().replace(' ', '_')

def parse_musician_line_data(line):
    if line.startswith(u'#') or len(line) == 0:
        return None
    parts = line.split('|')
    if len(parts) != 9:
        return None
    if len(parts[0]) == 0: # no uid
        parts[0] = get_id_from_full_name(parts[2])
    return parts

def main():
    musicians = []
    f = codecs.open('../static/data/musician.txt', 'r', 'utf-8')
    for line in f:
        m = parse_musician_line_data(line.rstrip(u'\r\n'))
        if not m:
            continue

        m_image_dir = '../static/data/musician_image/' + m[0]
        if os.path.isdir(m_image_dir):
            print os.listdir(m_image_dir)
        musicians.append(m)
    f.close()
#    for m in musicians:
#        print m

if __name__ == '__main__':
    main()

